import requests
from lxml import etree
import time

counter = 0
page = 0
while page < 100:
    page += 1
    if page == 1:
        url = "https://www.xinpianchang.com/discover/article-0-0-all-all-0-0-pick"
    else:
        url = f"https://www.xinpianchang.com/discover/article-0-0-all-all-0-0-pick-pp{page}"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
        "Cookie": "Device_ID=81d6d2d1-fd93-42ae-8bd6-d8e9a06fd61e; Authorization=3C66BECDFFF02C90AFFF024BF1FFF028CEDFFF023002AE232C6D; Hm_lvt_446567e1546b322b726d54ed9b5ad346=1729671750,1729731335; HMACCOUNT=DF40E51DB7E40016; Hm_lpvt_446567e1546b322b726d54ed9b5ad346=1729734224; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%2214576330%22%2C%22first_id%22%3A%22191e067685c15-0955794af05687-26001151-2073600-191e067685d54f%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_utm_source%22%3A%22xpcWeb%22%2C%22%24latest_utm_medium%22%3A%22navigatorCate%22%7D%2C%22%24device_id%22%3A%22191e067685c15-0955794af05687-26001151-2073600-191e067685d54f%22%7D"
    }
    response = requests.get(url, headers=headers)
    result = response.content.decode()
    # with open(f"新片场{page}.html", "w", encoding="utf-8") as f:
    #     f.write(result)
    root = etree.HTML(result)
    div_list = root.xpath("//div[@class='sc-7a811143-0 eVXfIM']")
    print(len(div_list))
    if len(div_list) == 0:
        print("没有数据")
        time.sleep(3)
        page -= 1
        continue
    for div in div_list:
        counter += 1
        title = div.xpath(".//h2/text()")[0]
        play_hot = div.xpath(".//ul[@class='sc-8f6caae3-0 gzlhEC']/li/span/text()")
        play = play_hot[1]
        hot = play_hot[3]
        duration = div.xpath(".//div[@class='text-xs']/text()")[0]
        # 模糊匹配
        author = div.xpath(".//div[contains(@class,'text-gray-800')]/text()")[0]
        print(counter, title, play, hot,duration,author)
